library(Seurat)
## Attaching SeuratObject
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(stringr)
library(tibble)
library(patchwork)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout

DE table

First we load the sister pair DE tables and filter for:

  • absolute avg_log2FC > 0.5 (~41% increase)

  • p_val_adj < 0.01

DE_list <- readRDS("~/spinal_cord_paper/data/Gg_ctrl_poly_sis_markers.rds")

for (i in seq(DE_list)) {
    DE_list[[i]] <- DE_list[[i]] %>% 
    arrange(desc(avg_log2FC)) %>% 
    filter(abs(avg_log2FC) > 0.5) %>% 
    filter(p_val_adj < 0.01)
}

DE_table <- do.call(rbind, DE_list)
dim(DE_table)
## [1] 807   8

delta pct distribution

par(mfrow = c(2,2))
hist(abs(DE_list[[1]]$delta_pct), breaks = 20)
abline(v = 0.1, lty = "dashed", col = "red")
hist(abs(DE_list[[2]]$delta_pct), breaks = 20)
abline(v = 0.1, lty = "dashed", col = "red")
hist(abs(DE_list[[4]]$delta_pct), breaks = 20)
abline(v = 0.1, lty = "dashed", col = "red")
hist(abs(DE_list[[5]]$delta_pct), breaks = 20)
abline(v = 0.1, lty = "dashed", col = "red")

Now we filter the DE lists for absolute delta percentage > 0.1.

for (i in seq(DE_list)) {
  DE_list[[i]] <- DE_list[[i]] %>% 
  filter(abs(delta_pct) > 0.1)
}

DE_table <- do.call(rbind, DE_list)
dim(DE_table)
## [1] 671   8

Broad clusters

broad_order <- c("progenitors",
      "FP",
      "RP",
      "FP/RP",
      "neurons",
      "OPC",
      "MFOL",
      "pericytes",
      "microglia",
      "blood",
      "vasculature"
      )

Integrated data

Load the integrated control and poly data.

int_path <- "Gg_ctrl_poly_int_seurat_250723"

my.se <- readRDS(paste0("~/spinal_cord_paper/data/", int_path, ".rds"))
  annot_int <- read.csv(list.files("~/spinal_cord_paper/annotations",
                               pattern = str_remove(int_path, "_seurat_\\d{6}"),
                               full.names = TRUE))
  
  if(length(table(annot_int$number)) != length(table(my.se$seurat_clusters))) {
     stop("Number of clusters must be identical!")
  }
  
  # rename for left join
  annot_int <- annot_int %>% 
    mutate(fine = paste(fine, number, sep = "_")) %>% 
    mutate(number = factor(number, levels = 1:nrow(annot_int))) %>% 
    rename(seurat_clusters = number)
  
  ord_levels <- annot_int$fine[order(match(annot_int$broad, broad_order))]
   
  # add cluster annotation to meta data
  my.se@meta.data <- my.se@meta.data %>% 
    rownames_to_column("rowname") %>% 
    left_join(annot_int, by = "seurat_clusters") %>% 
    mutate(fine = factor(fine, levels = ord_levels)) %>% 
    mutate(seurat_clusters = factor(seurat_clusters, levels = str_extract(ord_levels, "\\d{1,2}$"))) %>% 
    column_to_rownames("rowname")
  
  ctrl_poly_int_combined_labels <- readRDS("~/spinal_cord_paper/annotations/ctrl_poly_int_combined_labels.rds")
  
  my.se <- AddMetaData(my.se, ctrl_poly_int_combined_labels)

DimPlot

DimPlot(
  my.se,
  group.by = "annot_sample",
  reduction = "tsne",
  label = TRUE,
  repel = TRUE
  ) +
  NoLegend()

Cluster order

Get the cluster order from the spearman correlation heatmap of the control and poly integrated data. Then we filter for the neuronal clusters only.

corr_heatmap <- readRDS("~/spinal_cord_paper/output/heatmap_spearman_ctrl_poly.rds")

#heatmap order
htmp_order <- data.frame("label" = corr_heatmap[["gtable"]]$grobs[[4]]$label) %>% 
  mutate(label = str_remove(label, "_int")) %>% 
  mutate(label_ordered = paste(str_sub(label,6 ,-1), str_sub(label, 1, 4), sep = "_"))

my.se@meta.data <- my.se@meta.data %>%
  mutate(annot_sample = factor(annot_sample, levels = htmp_order$label_ordered))

Idents(my.se) <- "annot_sample"

# filter for the neuronal clusters
my.se <- subset(my.se, idents = htmp_order$label_ordered[grepl("neurons|MN|CSF", htmp_order$label_ordered)])

DimPlot(
  my.se,
  group.by = "annot_sample",
  reduction = "tsne",
  label = TRUE,
  repel = TRUE
  ) +
  NoLegend()

my.se@active.assay <- "RNA"

Dotplot

# Dotplot of sister pair makrers
pl_all <- modplots::mDotPlot2(my.se,
                    group.by = "annot_sample", 
                      # reverse order of unique genes so number one is on top
                    features = rev(unique(DE_table$Gene.stable.ID)),
                    gnames = modplots::gnames,
          cols = c("lightgrey", "black")) +
    theme(axis.text.x = element_text(angle = 90, hjust=1, vjust=0.5)) +
    coord_flip()
## 
pl_all

pdf("~/spinal_cord_paper/figures/Sister_pair_DE_dotplot.pdf", width = 15, height = 32)
  pl_all

  
DE_table$Gene.name[duplicated(DE_table$Gene.stable.ID)]
##  [1] "HES5"               "MAP6"               "GNG5"              
##  [4] "ST18"               "GAD1"               "FABP3"             
##  [7] "SYT1"               "SLC32A1"            "KIF5C"             
## [10] "HMP19"              "GALNT9"             "VSTM2L"            
## [13] "HINTW"              "DNER"               "CRABP-I"           
## [16] "RELN"               "PAX2"               "NEUROD2"           
## [19] "CHL1"               "LHX1"               "NRXN3"             
## [22] "ENSGALG00000029521" "BHLHE22"            "SPOCK1"            
## [25] "SSTR1"              "SLC32A1"            "NCALD"             
## [28] "ID2"                "GRIK3"              "GAD2"              
## [31] "PTPRK"              "GABRG3"             "GAD1"              
## [34] "RUNX1T1"            "HPCAL1"             "ZEB2"              
## [37] "GALNT9"             "ENSGALG00000013212" "MDK"               
## [40] "ZFPM2"              "RELN"               "NEUROD6"           
## [43] "CPLX1"              "LAMP5"              "WNT5A"             
## [46] "HINTW"              "SOX4"               "DKK3"              
## [49] "UNC13B"             "ATP1B1"             "GALNT17"           
## [52] "RASD1"              "ENSGALG00000051980" "PLXNA4"            
## [55] "DACT2"              "DISP3"              "MVB12B"            
## [58] "ENSGALG00000054223" "CNTN4"              "ZNF423"            
## [61] "CBLB"               "FKBP1B"             "CELF2"             
## [64] "EPB41L4A"           "PXYLP1"             "ENSGALG00000023640"
## [67] "CNTN2"              "MRPS6"              "PPP3CA"            
## [70] "NFIX"               "NFIA"               "SOX8"              
## [73] "DRAXIN"             "CRABP-I"            "NHLH1"             
## [76] "TAC1"               "VSTM2L"             "CPNE2"             
## [79] "PRKCA"

Individual dot plots

# select top50 by log2FC 
for (i in seq(DE_list)) {
    DE_list[[i]] <- DE_list[[i]] %>%
    slice_max(order_by = abs(avg_log2FC), n = 50) %>% 
    arrange(desc(avg_log2FC))
}

p1 <- modplots::mDotPlot2(my.se,
                    group.by = "annot_sample", 
                    assay = "RNA",
                      # reverse order of DE genes so number one is on top
                    features = rev(DE_list[[1]]$Gene.stable.ID),
                    gnames = modplots::gnames,
          cols = c("lightgrey", "black")) +
    theme(axis.text.x = element_blank()) +
    coord_flip() +
    xlab(names(DE_list)[1]) +
    ylab(element_blank())

p2 <- modplots::mDotPlot2(my.se,
                    group.by = "annot_sample",  
                    assay = "RNA",
                      # reverse order of DE genes so number one is on top
                    features = rev(DE_list[[2]]$Gene.stable.ID),
                    gnames = modplots::gnames,
          cols = c("lightgrey", "black")) +
    theme(axis.text.x = element_blank()) +
    coord_flip() +
    xlab(names(DE_list)[2]) +
    ylab(element_blank())

p3 <- modplots::mDotPlot2(my.se,
                    group.by = "annot_sample",  
                    assay = "RNA",
                      # reverse order of DE genes so number one is on top
                    features = rev(DE_list[[5]]$Gene.stable.ID),
                    gnames = modplots::gnames,
          cols = c("lightgrey", "black")) +
    theme(axis.text.x = element_blank()) +
    coord_flip() +
    xlab(names(DE_list)[5]) +
    ylab(element_blank())

p4 <- modplots::mDotPlot2(my.se,
                    group.by = "annot_sample",  
                    assay = "RNA",
                      # reverse order of DE genes so number one is on top
                    features = rev(DE_list[[4]]$Gene.stable.ID),
                    gnames = modplots::gnames,
          cols = c("lightgrey", "black")) +
    theme(axis.text.x = element_text(angle = 90, hjust=1, vjust=0.5)) +
    coord_flip() +
    xlab(names(DE_list)[4]) +
    ylab(element_blank())
layout <- "CCDD
           CC##"

pdf("~/spinal_cord_paper/figures/Supp_Fig_5_ctrl_poly_dotplot_individual.pdf", height = 21, width = 7)
# without labels for proper alignment
(p1 + p2 + plot_layout(guides = "collect")) /
(p3 + p4 + plot_layout(guides = "collect", design = layout)) & 
  theme(axis.text.x = element_blank(),
        axis.text.y = element_blank())
# with labels to transfer in illustrator
(p1 + p2 + plot_layout(guides = "collect")) /
(p3 + p4 + plot_layout(guides = "collect", design = layout))

dev.off()
## png 
##   2

Volcanoplots

p.adj <- 0.01
l2fc <- 0

# select top50 by log2FC 
for (i in seq(DE_list)) {
    DE_list[[i]] <- DE_list[[i]] %>% 
    mutate(delta_pct_sign = case_when(
      delta_pct < 0 ~ "-",
      delta_pct > 0 ~ "+",
      delta_pct == 0 ~ "0"
    ))
}
 

toplot <- do.call(rbind, DE_list[c(4,1)]) %>% 
  rownames_to_column("contrast") %>% 
  mutate(contrast = str_remove(contrast, "\\.\\d{1,2}")) %>% 
  mutate(contrast = str_replace_all(contrast, " ", "_"))

volplot <- ggplot(data = toplot,
       aes(x = avg_log2FC,
           y = -log10(p_val_adj),
           label = Gene.name,
           color = delta_pct_sign,
           size = abs(delta_pct)
       )) +
  geom_point(shape = 21) +
  geom_hline(yintercept = -log10(p.adj), linetype = "dashed") +
  geom_vline(xintercept = c(-l2fc,l2fc), linetype = "dashed") +
  scale_color_manual(values = c("goldenrod3", "black")) +
  scale_size_continuous(range = c(0.5, 4)) +
  facet_wrap("contrast", ncol = 1, scales = "free_y") +
  ylab("-log10(padj)") +
  theme_bw()

ggplotly(volplot)
pdf("~/spinal_cord_paper/figures/Fig_5_volcanoplots.pdf", width = 5, height = 10)
(volplot +
  ggrepel::geom_text_repel(size = 3, color = "black"))
## Warning: ggrepel: 13 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Specific markers

Find Markers for clusters 11_ctrl, 16_ctrl, and 15_poly.

gnames <- modplots::gnames

markers <- list()

clu <- c("inhibitory_neurons_16_ctrl",
         "excitatory neurons_11_ctrl",
         "excitatory_neurons_15_poly")

for (i in seq(clu)) {  
  markers[[i]] <- FindMarkers(
      my.se,
      ident.1 = clu[i],
      group.by = "annot_sample",
      assay = "RNA",
      verbose = FALSE,
      only.pos = TRUE, # we look for overexpressed, specific markers
      min.pct = 0.25,
      logfc.threshold =  0.25,
      latent.vars = c("CC.Difference.seurat"),
      test.use = "MAST"
    ) %>%
      tibble::rownames_to_column("Gene.stable.ID") %>%
      dplyr::left_join(gnames, by = "Gene.stable.ID") %>%
      dplyr::arrange(-avg_log2FC) %>%
      dplyr::filter(p_val_adj < 0.05) %>%
      dplyr::filter(abs(avg_log2FC) > 0.5) %>%
    dplyr::mutate(delta_pct = abs(pct.1 - pct.2))
}
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
## 
## Done!
## Combining coefficients and standard errors
## Calculating log-fold changes
## Calculating likelihood ratio tests
## Refitting on reduced model...
## 
## Done!
names(markers) <- clu

Specific marker dotplot

Plot the top 50 markers for clusters 11_ctrl, 16_ctrl, and 15_poly.

n <- 50

mark_plot <- list()

for (i in seq(clu)) {
  mark_plot[[i]] <- modplots::mDotPlot2(my.se,
                    group.by = "annot_sample", 
                      # reverse order of markers so number one is on top
                    features = rev(markers[[i]][1:n,"Gene.stable.ID"]),
                    gnames = modplots::gnames) +
    theme(axis.text.x = element_text(angle = 90, hjust=1, vjust=0.5)) +
    coord_flip() +
  scale_colour_gradientn(colours = c("gray90","gray80","yellow", "orange", "red", "darkred", "darkred")) +
  ggtitle(paste0("Top ", n, " markers by log2FC for ", clu[i]))

}
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
mark_plot[[1]]

mark_plot[[2]]

mark_plot[[3]]

pdf("~/spinal_cord_paper/figures/Sister_pair_neuron_marker_dotplots.pdf", width = 14, height = n/3)
mark_plot[[1]]
mark_plot[[2]]
mark_plot[[3]]
# Date and time of Rendering
Sys.time()
## [1] "2024-06-17 14:19:41 CEST"
sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Linux 7 (Core)
## 
## Matrix products: default
## BLAS/LAPACK: /scicore/soft/apps/OpenBLAS/0.3.1-GCC-7.3.0-2.30/lib/libopenblas_sandybridgep-r0.3.1.so
## 
## locale:
## [1] en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] plotly_4.10.0      patchwork_1.1.1    tibble_3.1.8       stringr_1.4.0     
## [5] ggplot2_3.3.3      dplyr_1.0.10       SeuratObject_4.0.2 Seurat_4.0.5      
## 
## loaded via a namespace (and not attached):
##   [1] plyr_1.8.6                  igraph_1.2.6               
##   [3] lazyeval_0.2.2              sp_1.4-5                   
##   [5] splines_4.1.0               crosstalk_1.1.1            
##   [7] listenv_0.8.0               scattermore_0.7            
##   [9] GenomeInfoDb_1.28.0         digest_0.6.27              
##  [11] htmltools_0.5.1.1           fansi_0.5.0                
##  [13] magrittr_2.0.1              memoise_2.0.0              
##  [15] tensor_1.5                  cluster_2.1.2              
##  [17] ROCR_1.0-11                 globals_0.16.2             
##  [19] Biostrings_2.60.0           matrixStats_0.58.0         
##  [21] modplots_1.0.0              spatstat.sparse_3.0-0      
##  [23] prettyunits_1.1.1           colorspace_2.0-1           
##  [25] blob_1.2.1                  ggrepel_0.9.1              
##  [27] xfun_0.34                   RCurl_1.98-1.3             
##  [29] crayon_1.4.1                jsonlite_1.7.2             
##  [31] spatstat.data_3.0-0         survival_3.2-11            
##  [33] zoo_1.8-9                   glue_1.6.2                 
##  [35] polyclip_1.10-0             gtable_0.3.0               
##  [37] zlibbioc_1.38.0             XVector_0.32.0             
##  [39] leiden_0.3.9                DelayedArray_0.18.0        
##  [41] SingleCellExperiment_1.14.1 future.apply_1.7.0         
##  [43] BiocGenerics_0.38.0         abind_1.4-5                
##  [45] scales_1.1.1                pheatmap_1.0.12            
##  [47] DBI_1.1.1                   miniUI_0.1.1.1             
##  [49] Rcpp_1.0.7                  progress_1.2.2             
##  [51] viridisLite_0.4.0           xtable_1.8-4               
##  [53] reticulate_1.22             spatstat.core_2.1-2        
##  [55] bit_4.0.4                   stats4_4.1.0               
##  [57] htmlwidgets_1.5.3           httr_1.4.2                 
##  [59] RColorBrewer_1.1-2          ellipsis_0.3.2             
##  [61] ica_1.0-2                   pkgconfig_2.0.3            
##  [63] farver_2.1.0                sass_0.4.0                 
##  [65] uwot_0.1.10                 deldir_1.0-6               
##  [67] utf8_1.2.1                  tidyselect_1.2.0           
##  [69] labeling_0.4.2              rlang_1.0.6                
##  [71] reshape2_1.4.4              later_1.2.0                
##  [73] AnnotationDbi_1.54.0        munsell_0.5.0              
##  [75] tools_4.1.0                 cachem_1.0.5               
##  [77] cli_3.4.1                   generics_0.1.3             
##  [79] RSQLite_2.2.7               ggridges_0.5.3             
##  [81] org.Gg.eg.db_3.13.0         evaluate_0.20              
##  [83] fastmap_1.1.0               yaml_2.2.1                 
##  [85] goftest_1.2-2               knitr_1.41                 
##  [87] bit64_4.0.5                 fitdistrplus_1.1-6         
##  [89] purrr_0.3.4                 RANN_2.6.1                 
##  [91] KEGGREST_1.32.0             pbapply_1.4-3              
##  [93] future_1.30.0               nlme_3.1-152               
##  [95] mime_0.10                   compiler_4.1.0             
##  [97] rstudioapi_0.13             png_0.1-7                  
##  [99] spatstat.utils_3.0-1        bslib_0.2.5.1              
## [101] stringi_1.6.2               highr_0.9                  
## [103] lattice_0.20-44             Matrix_1.3-3               
## [105] vctrs_0.5.1                 pillar_1.8.1               
## [107] lifecycle_1.0.3             spatstat.geom_3.0-3        
## [109] lmtest_0.9-38               jquerylib_0.1.4            
## [111] RcppAnnoy_0.0.19            bitops_1.0-7               
## [113] data.table_1.14.0           cowplot_1.1.1              
## [115] irlba_2.3.3                 GenomicRanges_1.44.0       
## [117] httpuv_1.6.1                R6_2.5.0                   
## [119] promises_1.2.0.1            KernSmooth_2.23-20         
## [121] gridExtra_2.3               IRanges_2.26.0             
## [123] parallelly_1.33.0           codetools_0.2-18           
## [125] MASS_7.3-54                 assertthat_0.2.1           
## [127] MAST_1.18.0                 SummarizedExperiment_1.22.0
## [129] withr_2.4.2                 sctransform_0.3.3          
## [131] GenomeInfoDbData_1.2.6      S4Vectors_0.30.0           
## [133] hms_1.1.0                   mgcv_1.8-35                
## [135] parallel_4.1.0              grid_4.1.0                 
## [137] rpart_4.1-15                tidyr_1.1.3                
## [139] rmarkdown_2.17              MatrixGenerics_1.4.0       
## [141] Rtsne_0.15                  Biobase_2.52.0             
## [143] shiny_1.6.0